// Scintilla source code edit control
/** @file CharClassify.h
 ** Character classifications used by Document and RESearch.
 **/
// Copyright 2006-2009 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#pragma once

namespace Scintilla {

constexpr bool IsDBCSCodePage(int codePage) noexcept {
	return codePage == 932
		|| codePage == 936
		|| codePage == 949
		|| codePage == 950
		|| codePage == 1361;
}

class CharClassify {
public:
	CharClassify() noexcept;

	enum cc {
		ccSpace, ccNewLine, ccWord, ccPunctuation, ccCJKWord
	};
	void SetDefaultCharClasses(bool includeWordClass) noexcept;
	void SetCharClasses(const unsigned char *chars, cc newCharClass) noexcept;
	void SetCharClassesEx(const unsigned char *chars, int length) noexcept;
	int GetCharsOfClass(cc characterClass, unsigned char *buffer) const noexcept;
	cc GetClass(unsigned char ch) const noexcept {
		return static_cast<cc>(charClass[ch]);
	}
	bool IsWord(unsigned char ch) const noexcept {
		return static_cast<cc>(charClass[ch]) == ccWord;
	}

	static void InitUnicodeData() noexcept;

//++Autogenerated -- start of section automatically generated
// Created with Python 3.8.2, Unicode 12.1.0
	static cc ClassifyCharacter(unsigned int ch) noexcept {
		if (ch < sizeof(classifyMap)) {
			return static_cast<cc>(classifyMap[ch]);
		}
		if (ch > maxUnicode) {
			// Cn
			return ccSpace;
		}

		ch -= sizeof(classifyMap);
		ch = (CharClassifyTable[ch >> 10] << 8) | (ch & 1023);
		ch = (CharClassifyTable[1024 + (ch >> 4)] << 4) | (ch & 15);
		return static_cast<cc>(CharClassifyTable[3392 + ch]);
	}
//--Autogenerated -- end of section automatically generated

private:
	static constexpr unsigned int maxUnicode = 0x10ffff;
	static const unsigned char CharClassifyTable[];
	static unsigned char classifyMap[0xffff + 1];

	enum {
		maxChar = 256
	};
	unsigned char charClass[maxChar];    // not type cc to save space
};

class DBCSCharClassify {
public:
	static const DBCSCharClassify* Get(int codePage);

	bool IsLeadByte(unsigned char ch) const noexcept {
		return leadByte[ch];
	}
	bool IsLeadByteInvalid(unsigned char ch) const noexcept {
		return invalidLeadByte[ch];
	}
	bool IsTrailByteInvalid(unsigned char ch) const noexcept {
		return invalidTrailByte[ch];
	}

	CharClassify::cc ClassifyCharacter(unsigned int ch) const noexcept {
		if (ch < sizeof(classifyMap)) {
			return static_cast<CharClassify::cc>(classifyMap[ch]);
		}
		// Cn
		return CharClassify::ccSpace;
	}

	constexpr int CodePage() const noexcept {
		return codePage;
	}

private:
	explicit DBCSCharClassify(int codePage_) noexcept;

	const int codePage;
	bool leadByte[256];
	bool invalidLeadByte[256];
	bool invalidTrailByte[256];
	unsigned char classifyMap[0xffff + 1];
};

}
